{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "fa17dfe5",
   "metadata": {},
   "source": [
    "<a href=\"https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/vector_stores/chroma_metadata_filter.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "307804a3-c02b-4a57-ac0d-172c30ddc851",
   "metadata": {},
   "source": [
    "# Chroma Vector Store"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "4d8b4131",
   "metadata": {},
   "source": [
    "If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cfdfbf42",
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install llama-index-vector-stores-chroma"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "06ad4191",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install llama-index"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "f7010b1d-d1bb-4f08-9309-a328bb4ea396",
   "metadata": {},
   "source": [
    "#### Creating a Chroma Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d48af8e1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import sys\n",
    "\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "898a8d42",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import getpass\n",
    "\n",
    "# os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
    "import openai\n",
    "\n",
    "openai.api_key = \"sk-\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0ce3143d-198c-4dd2-8e5a-c5cdf94f017a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import chromadb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "667f3cb3-ce18-48d5-b9aa-bfc1a1f0f0f6",
   "metadata": {},
   "outputs": [],
   "source": [
    "chroma_client = chromadb.EphemeralClient()\n",
    "chroma_collection = chroma_client.create_collection(\"quickstart\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0a2bcc07",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core import VectorStoreIndex\n",
    "from llama_index.vector_stores.chroma import ChromaVectorStore\n",
    "from IPython.display import Markdown, display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "68cbd239-880e-41a3-98d8-dbb3fab55431",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core.schema import TextNode\n",
    "\n",
    "nodes = [\n",
    "    TextNode(\n",
    "        text=\"The Shawshank Redemption\",\n",
    "        metadata={\n",
    "            \"author\": \"Stephen King\",\n",
    "            \"theme\": \"Friendship\",\n",
    "            \"year\": 1994,\n",
    "        },\n",
    "    ),\n",
    "    TextNode(\n",
    "        text=\"The Godfather\",\n",
    "        metadata={\n",
    "            \"director\": \"Francis Ford Coppola\",\n",
    "            \"theme\": \"Mafia\",\n",
    "            \"year\": 1972,\n",
    "        },\n",
    "    ),\n",
    "    TextNode(\n",
    "        text=\"Inception\",\n",
    "        metadata={\n",
    "            \"director\": \"Christopher Nolan\",\n",
    "            \"theme\": \"Fiction\",\n",
    "            \"year\": 2010,\n",
    "        },\n",
    "    ),\n",
    "    TextNode(\n",
    "        text=\"To Kill a Mockingbird\",\n",
    "        metadata={\n",
    "            \"author\": \"Harper Lee\",\n",
    "            \"theme\": \"Mafia\",\n",
    "            \"year\": 1960,\n",
    "        },\n",
    "    ),\n",
    "    TextNode(\n",
    "        text=\"1984\",\n",
    "        metadata={\n",
    "            \"author\": \"George Orwell\",\n",
    "            \"theme\": \"Totalitarianism\",\n",
    "            \"year\": 1949,\n",
    "        },\n",
    "    ),\n",
    "    TextNode(\n",
    "        text=\"The Great Gatsby\",\n",
    "        metadata={\n",
    "            \"author\": \"F. Scott Fitzgerald\",\n",
    "            \"theme\": \"The American Dream\",\n",
    "            \"year\": 1925,\n",
    "        },\n",
    "    ),\n",
    "    TextNode(\n",
    "        text=\"Harry Potter and the Sorcerer's Stone\",\n",
    "        metadata={\n",
    "            \"author\": \"J.K. Rowling\",\n",
    "            \"theme\": \"Fiction\",\n",
    "            \"year\": 1997,\n",
    "        },\n",
    "    ),\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba1558b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.core import StorageContext\n",
    "\n",
    "\n",
    "vector_store = ChromaVectorStore(chroma_collection=chroma_collection)\n",
    "storage_context = StorageContext.from_defaults(vector_store=vector_store)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "35369eda",
   "metadata": {},
   "outputs": [],
   "source": [
    "index = VectorStoreIndex(nodes, storage_context=storage_context)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3c35501f",
   "metadata": {},
   "source": [
    "## One Exact Match Filter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2149b1e0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[NodeWithScore(node=TextNode(id_='f343294f-4cd5-4f1c-acbf-19490aa95efb', embedding=None, metadata={'director': 'Francis Ford Coppola', 'theme': 'Mafia', 'year': 1972}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='79563896e320da86be371351f55d903acdcfb3229368a6622f6be6e929e8b7cc', text='The Godfather', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.6215522669166147),\n",
       " NodeWithScore(node=TextNode(id_='7910d5cd-7871-46e5-b71a-0dae1797aee1', embedding=None, metadata={'author': 'Harper Lee', 'theme': 'Mafia', 'year': 1960}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='0a1875c24455356c77eedd8eddd39035ec622959b59d2296eff56d42019a0c00', text='To Kill a Mockingbird', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.5873631114046581)]"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from llama_index.core.vector_stores import (\n",
    "    MetadataFilter,\n",
    "    MetadataFilters,\n",
    "    FilterOperator,\n",
    ")\n",
    "\n",
    "\n",
    "filters = MetadataFilters(\n",
    "    filters=[\n",
    "        MetadataFilter(key=\"theme\", operator=FilterOperator.EQ, value=\"Mafia\"),\n",
    "    ]\n",
    ")\n",
    "\n",
    "retriever = index.as_retriever(filters=filters)\n",
    "retriever.retrieve(\"What is inception about?\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8d44233b",
   "metadata": {},
   "source": [
    "## Multiple Exact Match Metadata Filters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "150e1e83",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[NodeWithScore(node=TextNode(id_='f343294f-4cd5-4f1c-acbf-19490aa95efb', embedding=None, metadata={'director': 'Francis Ford Coppola', 'theme': 'Mafia', 'year': 1972}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='79563896e320da86be371351f55d903acdcfb3229368a6622f6be6e929e8b7cc', text='The Godfather', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.6215522669166147)]"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters\n",
    "\n",
    "\n",
    "filters = MetadataFilters(\n",
    "    filters=[\n",
    "        MetadataFilter(key=\"theme\", value=\"Mafia\"),\n",
    "        MetadataFilter(key=\"year\", value=1972),\n",
    "    ]\n",
    ")\n",
    "\n",
    "retriever = index.as_retriever(filters=filters)\n",
    "retriever.retrieve(\"What is inception about?\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7911415f",
   "metadata": {},
   "source": [
    "## Multiple Metadata Filters with `AND` condition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "47bde992",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[NodeWithScore(node=TextNode(id_='b71ce5e8-353e-42c6-94b3-d0a11370aaba', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='110b4ab08da17685bdc3d53aecf6085a535dd00a43612eed991bce8074aa36a9', text='Inception', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.6250006485226994)]"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from llama_index.core.vector_stores import FilterOperator, FilterCondition\n",
    "\n",
    "\n",
    "filters = MetadataFilters(\n",
    "    filters=[\n",
    "        MetadataFilter(key=\"theme\", value=\"Fiction\"),\n",
    "        MetadataFilter(key=\"year\", value=1997, operator=FilterOperator.GT),\n",
    "    ],\n",
    "    condition=FilterCondition.AND,\n",
    ")\n",
    "\n",
    "retriever = index.as_retriever(filters=filters)\n",
    "retriever.retrieve(\"Harry Potter?\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "00471f4b",
   "metadata": {},
   "source": [
    "## Multiple Metadata Filters with `OR` condition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d67e1bb7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n",
      "HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[NodeWithScore(node=TextNode(id_='6b0e9499-9f4d-4637-ab2a-460e5c870948', embedding=None, metadata={'author': 'J.K. Rowling', 'theme': 'Fiction', 'year': 1997}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='a2656c2bc96ed472bb0ed3ea81075042e9860987f3156428789d07079e019ed0', text=\"Harry Potter and the Sorcerer's Stone\", start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.7405548668973673),\n",
       " NodeWithScore(node=TextNode(id_='b71ce5e8-353e-42c6-94b3-d0a11370aaba', embedding=None, metadata={'director': 'Christopher Nolan', 'theme': 'Fiction', 'year': 2010}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='110b4ab08da17685bdc3d53aecf6085a535dd00a43612eed991bce8074aa36a9', text='Inception', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.6250006485226994)]"
      ]
     },
     "execution_count": null,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from llama_index.core.vector_stores import FilterOperator, FilterCondition\n",
    "\n",
    "\n",
    "filters = MetadataFilters(\n",
    "    filters=[\n",
    "        MetadataFilter(key=\"theme\", value=\"Fiction\"),\n",
    "        MetadataFilter(key=\"year\", value=1997, operator=FilterOperator.GT),\n",
    "    ],\n",
    "    condition=FilterCondition.OR,\n",
    ")\n",
    "\n",
    "retriever = index.as_retriever(filters=filters)\n",
    "retriever.retrieve(\"Harry Potter?\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  },
  "vscode": {
   "interpreter": {
    "hash": "0ac390d292208ca2380c85f5bce7ded36a7a25670a97c40b8009630eb36cb06e"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
